{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "inputWidgets": {},
     "nuid": "d5d73c24-4d5d-4072-bc39-ec1280befb8b",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "# restaurant-orders\n",
    "\n",
    "从订单列表中统计 Plain Papadum 每年的销售额。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "inputWidgets": {},
     "nuid": "eda6c7c0-62cb-4fd3-a26f-10ad63b5a9e2",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "import datetime\n",
    "\n",
    "# 读取数据文件\n",
    "fileRdd = sc.textFile(\"/mnt/databrickscontainer1/restaurant-1-orders.csv\")\n",
    "\n",
    "# 将订单中的标题行去掉\n",
    "dataRdd = fileRdd.map(lambda x: x.split(\",\")).filter(lambda x: x[0] != \"Order Number\")\n",
    "\n",
    "# 过滤出 Plain Papadum 的订单\n",
    "papadumRdd = dataRdd.filter(lambda x: x[2] == \"Plain Papadum\")\n",
    "\n",
    "# 将日期字符串转换成日期，将数量与单价相乘\n",
    "dateRdd = papadumRdd.map(lambda x: [datetime.datetime.strptime(x[1],'%d/%m/%Y %H:%M').year, float(x[3]) * float(x[4])])\n",
    "\n",
    "# 将订单金额按年进行聚合\n",
    "totalRdd = dateRdd.reduceByKey(lambda a,b: a + b)\n",
    "\n",
    "# 原始统计结果\n",
    "print(totalRdd.collect())\n",
    "# 统计结果按年排序\n",
    "print(totalRdd.sortByKey().collect())\n",
    "# 统计结果按销售额排序\n",
    "print(totalRdd.sortBy(lambda x: x[1]).collect())"
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [],
   "language": "python",
   "notebookMetadata": {
    "pythonIndentUnit": 4
   },
   "notebookName": "restaurant-orders",
   "notebookOrigID": 2180139033493834,
   "widgets": {}
  },
  "kernelspec": {
   "display_name": "",
   "name": ""
  },
  "language_info": {
   "name": ""
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
